From b87a9fd7f96514af8ae7d752ecdc3d13b9167193 Mon Sep 17 00:00:00 2001 From: "cl349@freefall.cl.cam.ac.uk" Date: Mon, 8 Nov 2004 15:29:51 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.392 (418f90efoPSUCiX94PZ6xsvTCedrgQ) Support compile with CONFIG_SMP. --- .rootkeys | 4 + linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig | 75 +- .../arch/xen/i386/kernel/Makefile | 7 +- .../arch/xen/i386/kernel/cpu/common.c | 6 + .../arch/xen/i386/kernel/smp.c | 646 +++++++++ .../arch/xen/i386/kernel/smpboot.c | 1209 +++++++++++++++++ .../arch/xen/i386/kernel/time.c | 5 + .../arch/xen/kernel/Makefile | 2 +- linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c | 19 + .../drivers/xen/console/console.c | 2 +- .../asm-xen/asm-i386/mach-xen/irq_vectors.h | 2 +- .../asm-xen/asm-i386/mach-xen/smpboot_hooks.h | 56 + .../include/asm-xen/asm-i386/msr.h | 7 +- 13 files changed, 1992 insertions(+), 48 deletions(-) create mode 100644 linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c create mode 100644 linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c create mode 100644 linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c create mode 100644 linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h diff --git a/.rootkeys b/.rootkeys index 0b9dde39da..0aeff963a8 100644 --- a/.rootkeys +++ b/.rootkeys @@ -148,6 +148,8 @@ 40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.9-xen-sparse/arch/xen/i386/kernel/process.c 40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c 40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/signal.c +41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c +41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c 40f56238UL9uv78ODDzMwLL9yryeFw linux-2.6.9-xen-sparse/arch/xen/i386/kernel/sysenter.c 40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c 40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.9-xen-sparse/arch/xen/i386/kernel/timers/Makefile @@ -175,6 +177,7 @@ 412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.9-xen-sparse/arch/xen/kernel/gnttab.c 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.9-xen-sparse/arch/xen/kernel/reboot.c 414c113396tK1HTVeUalm3u-1DF16g linux-2.6.9-xen-sparse/arch/xen/kernel/skbuff.c +418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c 3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.9-xen-sparse/arch/xen/kernel/xen_proc.c 41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.9-xen-sparse/drivers/Makefile 4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.9-xen-sparse/drivers/char/mem.c @@ -216,6 +219,7 @@ 40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h 40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h 40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h +41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h 4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h 40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/page.h diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig b/linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig index 6e22f7ca21..a971177164 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig @@ -322,36 +322,33 @@ config HPET_EMULATE_RTC def_bool HPET_TIMER && RTC=y config SMP - bool - default n -#config SMP -# bool "Symmetric multi-processing support" -# ---help--- -# This enables support for systems with more than one CPU. If you have -# a system with only one CPU, like most personal computers, say N. If -# you have a system with more than one CPU, say Y. -# -# If you say N here, the kernel will run on single and multiprocessor -# machines, but will use only one CPU of a multiprocessor machine. If -# you say Y here, the kernel will run on many, but not all, -# singleprocessor machines. On a singleprocessor machine, the kernel -# will run faster if you say N here. -# -# Note that if you say Y here and choose architecture "586" or -# "Pentium" under "Processor family", the kernel will not work on 486 -# architectures. Similarly, multiprocessor kernels for the "PPro" -# architecture may not work on all Pentium based boards. -# -# People using multiprocessor machines who say Y here should also say -# Y to "Enhanced Real Time Clock Support", below. The "Advanced Power -# Management" code will be disabled if you say Y here. -# -# See also the , -# , -# and the SMP-HOWTO available at -# . -# -# If you don't know what to do here, say N. + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also the , + , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. config NR_CPUS int "Maximum number of CPUs (2-255)" @@ -632,6 +629,11 @@ config REGPARM -mregparm=3 is used. +config X86_LOCAL_APIC + bool + depends on (X86_VISWS || SMP) && !X86_VOYAGER + default n + if XEN_PHYSDEV_ACCESS menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" @@ -641,11 +643,6 @@ config X86_VISWS_APIC depends on X86_VISWS default y -config X86_LOCAL_APIC - bool - depends on (X86_VISWS || SMP) && !X86_VOYAGER - default y - config X86_IO_APIC bool depends on SMP && !(X86_VISWS || X86_VOYAGER) @@ -908,10 +905,10 @@ config X86_SMP depends on SMP && !X86_VOYAGER default y -config X86_HT - bool - depends on SMP && !(X86_VISWS || X86_VOYAGER) - default y +#config X86_HT +# bool +# depends on SMP && !(X86_VISWS || X86_VOYAGER) +# default y config X86_BIOS_REBOOT bool diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/Makefile b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/Makefile index 3124b25b99..b069c8a5f2 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/Makefile +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/Makefile @@ -26,10 +26,11 @@ c-obj-$(CONFIG_X86_MSR) += msr.o c-obj-$(CONFIG_X86_CPUID) += cpuid.o c-obj-$(CONFIG_MICROCODE) += microcode.o c-obj-$(CONFIG_APM) += apm.o -c-obj-$(CONFIG_X86_SMP) += smp.o smpboot.o -c-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_X86_SMP) += smp.o smpboot.o +#obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o c-obj-$(CONFIG_X86_MPPARSE) += mpparse.o -c-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +#obj-$(CONFIG_X86_LOCAL_APIC) += apic.o +c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o c-obj-$(CONFIG_X86_IO_APIC) += io_apic.o c-obj-$(CONFIG_X86_NUMAQ) += numaq.o c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c index a59d760eb7..2a2322d80e 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c @@ -604,3 +604,9 @@ void __init cpu_init (void) current->used_math = 0; mxcsr_feature_mask_init(); } + + +int get_smp_processor_id(void) +{ + return smp_processor_id(); +} diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c new file mode 100644 index 0000000000..31b4ed6838 --- /dev/null +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c @@ -0,0 +1,646 @@ +/* + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * + * This code is released under the GNU General Public License version 2 or + * later. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#if 0 +#include +#endif + +/* + * Some notes on x86 processor bugs affecting SMP operation: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP errata are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP errata are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP errata are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + * + * Pentium + * There is a marginal case where REP MOVS on 100MHz SMP + * machines with B stepping processors can fail. XXX should provide + * an L1cache=Writethrough or L1cache=off option. + * + * B stepping CPUs may hang. There are hardware work arounds + * for this. We warn about it in case your board doesn't have the work + * arounds. Basically thats so I can tell anyone with a B stepping + * CPU and SMP problems "tough". + * + * Specific items [From Pentium Processor Specification Update] + * + * 1AP. Linux doesn't use remote read + * 2AP. Linux doesn't trust APIC errors + * 3AP. We work around this + * 4AP. Linux never generated 3 interrupts of the same priority + * to cause a lost local interrupt. + * 5AP. Remote read is never used + * 6AP. not affected - worked around in hardware + * 7AP. not affected - worked around in hardware + * 8AP. worked around in hardware - we get explicit CS errors if not + * 9AP. only 'noapic' mode affected. Might generate spurious + * interrupts, we log only the first one and count the + * rest silently. + * 10AP. not affected - worked around in hardware + * 11AP. Linux reads the APIC between writes to avoid this, as per + * the documentation. Make sure you preserve this as it affects + * the C stepping chips too. + * 12AP. not affected - worked around in hardware + * 13AP. not affected - worked around in hardware + * 14AP. we always deassert INIT during bootup + * 15AP. not affected - worked around in hardware + * 16AP. not affected - worked around in hardware + * 17AP. not affected - worked around in hardware + * 18AP. not affected - worked around in hardware + * 19AP. not affected - worked around in BIOS + * + * If this sounds worrying believe me these bugs are either ___RARE___, + * or are signal timing bugs worked around in hardware and there's + * about nothing of note with C stepping upwards. + */ + +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline int __prepare_ICR (unsigned int shortcut, int vector) +{ + return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; +} + +static inline int __prepare_ICR2 (unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +void __send_IPI_shortcut(unsigned int shortcut, int vector) +{ +#if 1 + printk("__send_IPI_shortcut\n"); +#else + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +#endif +} + +void fastcall send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector); +} + +/* + * This is only used on smaller machines. + */ +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +{ +#if 1 + printk("send_IPI_mask_bitmask\n"); +#else + unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long cfg; + unsigned long flags; + + local_irq_save(flags); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + + local_irq_restore(flags); +#endif +} + +inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +{ +#if 1 + printk("send_IPI_mask_sequence\n"); +#else + unsigned long cfg, flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + + for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { + if (cpu_isset(query_cpu, mask)) { + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + } + } + local_irq_restore(flags); +#endif +} + +#include /* must come after the send_IPI functions above for inlining */ + +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + */ + +static cpumask_t flush_cpumask; +static struct mm_struct * flush_mm; +static unsigned long flush_va; +static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED; +#define FLUSH_ALL 0xffffffff + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + * + * We need to reload %cr3 since the page tables may be going + * away from under us.. + */ +static inline void leave_mm (unsigned long cpu) +{ + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superflous + * tlb flush. + * 1a2) set cpu_tlbstate to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu_tlbstate[].active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu_tlbstate[].active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu_tlbstate to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu_tlbstate is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +asmlinkage void smp_invalidate_interrupt (void) +{ + unsigned long cpu; + + cpu = get_cpu(); + + if (!cpu_isset(cpu, flush_cpumask)) + goto out; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { + if (flush_va == FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(flush_va); + } else + leave_mm(cpu); + } +#if 1 + printk("smp_invalidate_interrupt ack_APIC_irq\n"); +#else + ack_APIC_irq(); +#endif + smp_mb__before_clear_bit(); + cpu_clear(cpu, flush_cpumask); + smp_mb__after_clear_bit(); +out: + put_cpu_no_resched(); +} + +static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + cpumask_t tmp; + /* + * A couple of (to be removed) sanity checks: + * + * - we do not send IPIs to not-yet booted CPUs. + * - current CPU must not be in mask + * - mask must exist :) + */ + BUG_ON(cpus_empty(cpumask)); + + cpus_and(tmp, cpumask, cpu_online_map); + BUG_ON(!cpus_equal(cpumask, tmp)); + BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(!mm); + + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. + * Temporarily this turns IRQs off, so that lockups are + * detected by the NMI watchdog. + */ + spin_lock(&tlbstate_lock); + + flush_mm = mm; + flush_va = va; +#if NR_CPUS <= BITS_PER_LONG + atomic_set_mask(cpumask, &flush_cpumask); +#else + { + int k; + unsigned long *flush_mask = (unsigned long *)&flush_cpumask; + unsigned long *cpu_mask = (unsigned long *)&cpumask; + for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) + atomic_set_mask(cpu_mask[k], &flush_mask[k]); + } +#endif + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + + while (!cpus_empty(flush_cpumask)) + /* nothing. lockup detection does not belong here */ + mb(); + + flush_mm = NULL; + flush_va = 0; + spin_unlock(&tlbstate_lock); +} + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + local_flush_tlb(); + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + preempt_enable(); +} + +void flush_tlb_mm (struct mm_struct * mm) +{ + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if(current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, va); + + preempt_enable(); +} + +static void do_flush_tlb_all(void* info) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) + leave_mm(cpu); +} + +void flush_tlb_all(void) +{ + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); +} + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static struct call_data_struct * call_data; + +/* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +/* + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +{ + struct call_data_struct data; + int cpus = num_online_cpus()-1; + + if (!cpus) + return 0; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + call_data = &data; + mb(); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + barrier(); + spin_unlock(&call_lock); + + return 0; +} + +static void stop_this_cpu (void * dummy) +{ + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + local_irq_disable(); +#if 1 + printk("stop_this_cpu disable_local_APIC\n"); +#else + disable_local_APIC(); +#endif + if (cpu_data[smp_processor_id()].hlt_works_ok) + for(;;) __asm__("hlt"); + for (;;); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); + + local_irq_disable(); +#if 1 + printk("smp_send_stop disable_local_APIC\n"); +#else + disable_local_APIC(); +#endif + local_irq_enable(); +} + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +asmlinkage void smp_reschedule_interrupt(void) +{ +#if 1 + printk("smp_reschedule_interrupt: ack_APIC_irq\n"); +#else + ack_APIC_irq(); +#endif +} + +asmlinkage void smp_call_function_interrupt(void) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + +#if 1 + printk("smp_call_function_interrupt: ack_APIC_irq\n"); +#else + ack_APIC_irq(); +#endif + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); + irq_exit(); + + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } +} + diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c new file mode 100644 index 0000000000..6558024935 --- /dev/null +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c @@ -0,0 +1,1209 @@ +/* + * x86 SMP booting functions + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU General Public License version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIPS report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX APICs + * Martin J. Bligh : Added support for multi-quad systems + * Dave Jones : Report invalid combinations of Athlon CPUs. +* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if 0 +#include +#endif +#include +#include + +#if 0 +/* Set if we find a B stepping CPU */ +static int __initdata smp_b_stepping; +#endif + +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ + +/* bitmap of online cpus */ +cpumask_t cpu_online_map; + +static cpumask_t cpu_callin_map; +cpumask_t cpu_callout_map; +static cpumask_t smp_commenced_mask; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; + +u8 x86_cpu_to_apicid[NR_CPUS] = + { [0 ... NR_CPUS-1] = 0xff }; +EXPORT_SYMBOL(x86_cpu_to_apicid); + +/* Set when the idlers are all forked */ +int smp_threads_ready; + +#if 0 +/* + * Trampoline 80x86 program as an array. + */ + +extern unsigned char trampoline_data []; +extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; +static int trampoline_exec; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +static unsigned long __init setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} +#endif + +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +void __init smp_alloc_memory(void) +{ +#if 1 + printk("smp_alloc_memory\n"); +#else + trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + if (__pa(trampoline_base) >= 0x9F000) + BUG(); + /* + * Make the SMP trampoline executable: + */ + trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); +#endif +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +#if 0 +static void __init smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = cpu_data + id; + + *c = boot_cpu_data; + if (id!=0) + identify_cpu(c); + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + + /* Athlon 660/661 is valid. */ + if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model==7) && (c->x86_mask==0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. + * It's worth noting that the A5 stepping (662) of some Athlon XP's + * have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. + */ + if (((c->x86_model==6) && (c->x86_mask>=2)) || + ((c->x86_model==7) && (c->x86_mask>=1)) || + (c->x86_model> 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, it's not a certified SMP capable AMD system. */ + tainted |= TAINT_UNSAFE_SMP; + } + +valid_k7: + ; +} +#endif + +#if 0 +/* + * TSC synchronization. + * + * We first check whether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS]; + +#define NR_LOOPS 5 + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + unsigned long one_usec; + int buggy = 0; + + printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); + + /* convert from kcyc/sec to cyc/usec */ + one_usec = cpu_khz / 1000; + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) + mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) + mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_isset(i, cpu_callout_map)) { + t0 = tsc_values[i]; + sum += t0; + } + } + avg = sum; + do_div(avg, num_booting_cpus()); + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*one_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = delta; + do_div(realdelta, one_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * Not every cpu is online at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != num_booting_cpus()) + mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); + } +} +#undef NR_LOOPS + +extern void calibrate_delay(void); + +static atomic_t init_deasserted; +#endif + +void __init smp_callin(void) +{ +#if 1 + printk("smp_callin\n"); +#else + int cpuid, phys_id; + unsigned long timeout; + + /* + * If waken up by an INIT in an 82489DX configuration + * we may get here before an INIT-deassert IPI reaches + * our local APIC. We have to wait for the IPI or we'll + * lock up on an APIC access. + */ + wait_for_init_deassert(&init_deasserted); + + /* + * (This works even if the APIC is not enabled.) + */ + phys_id = GET_APIC_ID(apic_read(APIC_ID)); + cpuid = smp_processor_id(); + if (cpu_isset(cpuid, cpu_callin_map)) { + printk("huh, phys CPU#%d, CPU#%d already present??\n", + phys_id, cpuid); + BUG(); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + /* + * Waiting 2s total for startup (udelay is not yet working) + */ + timeout = jiffies + 2*HZ; + while (time_before(jiffies, timeout)) { + /* + * Has the boot CPU finished it's STARTUP sequence? + */ + if (cpu_isset(cpuid, cpu_callout_map)) + break; + rep_nop(); + } + + if (!time_before(jiffies, timeout)) { + printk("BUG: CPU%d started up but did not get a callout!\n", + cpuid); + BUG(); + } + + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + Dprintk("CALLIN, before setup_local_APIC().\n"); + smp_callin_clear_local_apic(); + setup_local_APIC(); + map_cpu_to_logical_apicid(); + + local_irq_enable(); + + /* + * Get our bogomips. + */ + calibrate_delay(); + Dprintk("Stack at about %p\n",&cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + + disable_APIC_timer(); + local_irq_disable(); + /* + * Allow the master to continue. + */ + cpu_set(cpuid, cpu_callin_map); + + /* + * Synchronize the TSC with the BP + */ + if (cpu_has_tsc && cpu_khz) + synchronize_tsc_ap(); +#endif +} + +int cpucount; + +extern int cpu_idle(void); + +/* + * Activate a secondary processor. + */ +int __init start_secondary(void *unused) +{ +#if 1 + printk("start_secondary\n"); + return cpu_idle(); +#else + /* + * Dont put anything before smp_callin(), SMP + * booting is too fragile that we want to limit the + * things done here to the most necessary things. + */ + cpu_init(); + smp_callin(); + while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) + rep_nop(); + setup_secondary_APIC_clock(); + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + enable_NMI_through_LVT0(NULL); + enable_8259A_irq(0); + } + enable_APIC_timer(); + /* + * low-memory mappings have been cleared, flush them from + * the local TLBs too. + */ + local_flush_tlb(); + cpu_set(smp_processor_id(), cpu_online_map); + wmb(); + return cpu_idle(); +#endif +} + +/* + * Everything has been set up for the secondary + * CPUs - they just need to reload everything + * from the task structure + * This function must not return. + */ +void __init initialize_secondary(void) +{ + /* + * We don't actually need to load the full TSS, + * basically just the stack pointer and the eip. + */ + + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"r" (current->thread.esp),"r" (current->thread.eip)); +} + +extern struct { + void * esp; + unsigned short ss; +} stack_start; + +#ifdef CONFIG_NUMA + +/* which logical CPUs are on which nodes */ +cpumask_t node_2_cpu_mask[MAX_NUMNODES] = + { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; +/* which node each logical CPU is on */ +int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +EXPORT_SYMBOL(cpu_2_node); + +/* set up a mapping between cpu and node. */ +static inline void map_cpu_to_node(int cpu, int node) +{ + printk("Mapping cpu %d to node %d\n", cpu, node); + cpu_set(cpu, node_2_cpu_mask[node]); + cpu_2_node[cpu] = node; +} + +/* undo a mapping between cpu and node. */ +static inline void unmap_cpu_to_node(int cpu) +{ + int node; + + printk("Unmapping cpu %d from all nodes\n", cpu); + for (node = 0; node < MAX_NUMNODES; node ++) + cpu_clear(cpu, node_2_cpu_mask[node]); + cpu_2_node[cpu] = 0; +} +#else /* !CONFIG_NUMA */ + +#define map_cpu_to_node(cpu, node) ({}) +#define unmap_cpu_to_node(cpu) ({}) + +#endif /* CONFIG_NUMA */ + +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +void map_cpu_to_logical_apicid(void) +{ +#if 1 + printk("map_cpu_to_logical_apicid\n"); +#else + int cpu = smp_processor_id(); + int apicid = logical_smp_processor_id(); + + cpu_2_logical_apicid[cpu] = apicid; + map_cpu_to_node(cpu, apicid_to_node(apicid)); +#endif +} + +void unmap_cpu_to_logical_apicid(int cpu) +{ + cpu_2_logical_apicid[cpu] = BAD_APICID; + unmap_cpu_to_node(cpu); +} + +#if APIC_DEBUG +static inline void __inquire_remote_apic(int apicid) +{ + int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + char *names[] = { "ID", "VERSION", "SPIV" }; + int timeout, status; + + printk("Inquiring remote APIC #%d...\n", apicid); + + for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + printk("... APIC #%d %s: ", apicid, names[i]); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + + timeout = 0; + do { + udelay(100); + status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; + } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); + + switch (status) { + case APIC_ICR_RR_VALID: + status = apic_read(APIC_RRR); + printk("%08x\n", status); + break; + default: + printk("failed\n"); + } + } +} +#endif + +#if 0 +#ifdef WAKE_SECONDARY_VIA_NMI +/* + * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal + * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this + * won't ... remember to clear down the APIC, etc later. + */ +static int __init +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) +{ + unsigned long send_status = 0, accept_status = 0; + int timeout, maxlvt; + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + maxlvt = get_maxlvt(); + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + Dprintk("NMI sent.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} +#endif /* WAKE_SECONDARY_VIA_NMI */ + +#ifdef WAKE_SECONDARY_VIA_INIT +static int __init +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ +#if 1 + printk("wakeup_secondary_cpu\n"); + return 0; +#else + unsigned long send_status = 0, accept_status = 0; + int maxlvt, timeout, num_starts, j; + + /* + * Be paranoid about clearing APIC errors. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + + Dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* + * Send IPI + */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + mdelay(10); + + Dprintk("Deasserting INIT.\n"); + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Send IPI */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + atomic_set(&init_deasserted, 1); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't send the STARTUP IPIs. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + Dprintk("#startup loops: %d.\n", num_starts); + + maxlvt = get_maxlvt(); + + for (j = 1; j <= num_starts; j++) { + Dprintk("Sending STARTUP #%d.\n",j); + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + Dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + Dprintk("Startup point 1.\n"); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + Dprintk("After Startup.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +#endif +} +#endif /* WAKE_SECONDARY_VIA_INIT */ +#endif + +extern cpumask_t cpu_initialized; + +#if 0 +static int __init do_boot_cpu(int apicid) +/* + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad + * (ie clustered apic addressing mode), this is a LOGICAL apic ID. + * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + */ +{ + struct task_struct *idle; + unsigned long boot_error; + int timeout, cpu; + unsigned long start_eip; + unsigned short nmi_high = 0, nmi_low = 0; + + cpu = ++cpucount; + /* + * We can't use kernel_thread since we must avoid to + * reschedule the child. + */ + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + idle->thread.eip = (unsigned long) start_secondary; + /* start_eip had better be page-aligned! */ + start_eip = setup_trampoline(); + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); + /* Stack for startup_32 can be just as for start_secondary onwards */ + stack_start.esp = (void *) idle->thread.esp; + + irq_ctx_init(cpu); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + atomic_set(&init_deasserted, 0); + + Dprintk("Setting warm reset code and vector.\n"); + + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_eip); + + /* + * Starting actual IPI sequence... + */ + boot_error = wakeup_secondary_cpu(apicid, start_eip); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + cpu_set(cpu, cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (cpu_isset(cpu, cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); + printk("CPU%d: ", cpu); + print_cpu_info(&cpu_data[cpu]); + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; + if (*((volatile unsigned char *)trampoline_base) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); + else + /* trampoline code not run */ + printk("Not responding.\n"); + inquire_remote_apic(apicid); + } + } + x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_logical_apicid(cpu); + cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ + cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ + cpucount--; + } + + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)trampoline_base) = 0; + + return boot_error; +} + +cycles_t cacheflush_time; +#endif +unsigned long cache_decay_ticks; +#if 0 + +static void smp_tune_scheduling (void) +{ + unsigned long cachesize; /* kB */ + unsigned long bandwidth = 350; /* MB/s */ + /* + * Rough estimation for SMP scheduling, this is the number of + * cycles it takes for a fully memory-limited process to flush + * the SMP-local cache. + * + * (For a P5 this pretty much means we will choose another idle + * CPU almost always at wakeup time (this is due to the small + * L1 cache), on PIIs it's around 50-100 usecs, depending on + * the cache size) + */ + + if (!cpu_khz) { + /* + * this basically disables processor-affinity + * scheduling on SMP without a TSC. + */ + cacheflush_time = 0; + return; + } else { + cachesize = boot_cpu_data.x86_cache_size; + if (cachesize == -1) { + cachesize = 16; /* Pentiums, 2x8kB cache */ + bandwidth = 100; + } + + cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth; + } + + cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1; + + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", + (long)cacheflush_time/(cpu_khz/1000), + ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); + printk("task migration cache decay timeout: %ld msecs.\n", + cache_decay_ticks); +} + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ + +static int boot_cpu_logical_apicid; +#endif +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; + +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; + +static void __init smp_boot_cpus(unsigned int max_cpus) +{ +#if 1 + printk("smp_boot_cpus %d\n", max_cpus); +#else + int apicid, cpu, bit, kicked; + unsigned long bogosum = 0; + + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + printk("CPU%d: ", 0); + print_cpu_info(&cpu_data[0]); + + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_logical_apicid = logical_smp_processor_id(); + x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + + current_thread_info()->cpu = 0; + smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); + + /* + * If we couldn't find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config && !acpi_lapic) { + printk(KERN_NOTICE "SMP motherboard not detected.\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + if (APIC_init_uniprocessor()) + printk(KERN_NOTICE "Local APIC not detected." + " Using dummy APIC emulation.\n"); + map_cpu_to_logical_apicid(); + return; + } + + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * Makes no sense to do this check in clustered apic mode, so skip it + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + return; + } + + verify_local_APIC(); + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + smp_found_config = 0; + printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + return; + } + + connect_bsp_APIC(); + setup_local_APIC(); + map_cpu_to_logical_apicid(); + + + setup_portio_remap(); + + /* + * Scan the CPU present map and fire up the other CPUs via do_boot_cpu + * + * In clustered apic mode, phys_cpu_present_map is a constructed thus: + * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the + * clustered apic ID. + */ + Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); + + kicked = 1; + for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { + apicid = cpu_present_to_apicid(bit); + /* + * Don't even attempt to start the boot CPU! + */ + if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) + continue; + + if (!check_apicid_present(bit)) + continue; + if (max_cpus <= cpucount+1) + continue; + + if (do_boot_cpu(apicid)) + printk("CPU #%d not responding - cannot use it.\n", + apicid); + else + ++kicked; + } + + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + + /* + * Allow the user to impress friends. + */ + Dprintk("Before bogomips.\n"); + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_isset(cpu, cpu_callout_map)) + bogosum += cpu_data[cpu].loops_per_jiffy; + printk(KERN_INFO + "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + cpucount+1, + bogosum/(500000/HZ), + (bogosum/(5000/HZ))%100); + + Dprintk("Before bogocount - setting activated=1.\n"); + + if (smp_b_stepping) + printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + if (tainted & TAINT_UNSAFE_SMP) { + if (cpucount) + printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); + else + tainted &= ~TAINT_UNSAFE_SMP; + } + + Dprintk("Boot done.\n"); + + /* + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + + if (smp_num_siblings > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (phys_proc_id[cpu] == phys_proc_id[i]) { + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); + } + } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + check_nmi_watchdog(); + + smpboot_setup_io_apic(); + + setup_boot_APIC_clock(); + + /* + * Synchronize the TSC with the AP + */ + if (cpu_has_tsc && cpucount && cpu_khz) + synchronize_tsc_bp(); +#endif +} + +/* These are wrappers to interface to the new boot process. Someone + who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + smp_boot_cpus(max_cpus); +} + +void __devinit smp_prepare_boot_cpu(void) +{ + cpu_set(smp_processor_id(), cpu_online_map); + cpu_set(smp_processor_id(), cpu_callout_map); +} + +int __devinit __cpu_up(unsigned int cpu) +{ + /* This only works at boot for x86. See "rewrite" above. */ + if (cpu_isset(cpu, smp_commenced_mask)) { + local_irq_enable(); + return -ENOSYS; + } + + /* In case one didn't come up */ + if (!cpu_isset(cpu, cpu_callin_map)) { + local_irq_enable(); + return -EIO; + } + + local_irq_enable(); + /* Unleash the CPU! */ + cpu_set(cpu, smp_commenced_mask); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +#if 1 + printk("smp_cpus_done %d\n", max_cpus); +#else +#ifdef CONFIG_X86_IO_APIC + setup_ioapic_dest(); +#endif + zap_low_mappings(); + /* + * Disable executability of the SMP trampoline: + */ + set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif +} + +void __init smp_intr_init(void) +{ +#if 1 + printk("smp_intr_init\n"); +#else + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +#endif +} diff --git a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c index 1edc1cbf83..3a7360245c 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c @@ -401,6 +401,11 @@ static inline void do_timer_interrupt(int irq, void *dev_id, delta -= NS_PER_TICK; processed_system_time += NS_PER_TICK; do_timer(regs); +#ifdef CONFIG_SMP + if (regs) /* XXXsmp this needs to be done on every cpu + * - every tick - maybe */ + update_process_times(user_mode(regs)); +#endif if (regs) profile_tick(CPU_PROFILING, regs); } diff --git a/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile b/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile index eef477fe3e..65817ef595 100644 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile @@ -10,4 +10,4 @@ $(obj)/vmlinux.lds: extra-y += vmlinux.lds obj-y := ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o empty.o \ - gnttab.o skbuff.o + gnttab.o skbuff.o smp.o diff --git a/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c new file mode 100644 index 0000000000..51addc6c76 --- /dev/null +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c @@ -0,0 +1,19 @@ +/* Copyright (C) 2004, Christian Limpach */ + +#include +#include +#include + +unsigned int __initdata maxcpus = NR_CPUS; + + +/* + * the frequency of the profiling timer can be changed + * by writing a multiplier value into /proc/profile. + */ +int setup_profiling_timer(unsigned int multiplier) +{ + printk("setup_profiling_timer\n"); + + return 0; +} diff --git a/linux-2.6.9-xen-sparse/drivers/xen/console/console.c b/linux-2.6.9-xen-sparse/drivers/xen/console/console.c index b293961363..78ec3c0667 100644 --- a/linux-2.6.9-xen-sparse/drivers/xen/console/console.c +++ b/linux-2.6.9-xen-sparse/drivers/xen/console/console.c @@ -238,7 +238,7 @@ void xencons_force_flush(void) * We use dangerous control-interface functions that require a quiescent * system and no interrupts. Try to ensure this with a global cli(). */ - cli(); + local_irq_disable(); /* XXXsmp */ /* Spin until console data is flushed through to the domain controller. */ while ( (wc != wp) && !ctrl_if_transmitter_empty() ) diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h index b8384efc53..270dbcecb4 100644 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h @@ -77,7 +77,6 @@ * the usable vector space is 0x20-0xff (224 vectors) */ -#if 0 /* * The maximum number of vectors supported by i386 processors * is limited to 256. For processors other than i386, NR_VECTORS @@ -85,6 +84,7 @@ */ #define NR_VECTORS 256 +#if 0 #ifdef CONFIG_PCI_USE_VECTOR #define NR_IRQS FIRST_SYSTEM_VECTOR #define NR_IRQ_VECTORS NR_IRQS diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h new file mode 100644 index 0000000000..9885c24ab9 --- /dev/null +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h @@ -0,0 +1,56 @@ +/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws + * which needs to alter them. */ + +static inline void smpboot_clear_io_apic_irqs(void) +{ +#if 1 + printk("smpboot_clear_io_apic_irqs\n"); +#else + io_apic_irqs = 0; +#endif +} + +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) +{ +#if 1 + printk("smpboot_setup_warm_reset_vector\n"); +#else + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + Dprintk("1.\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + Dprintk("2.\n"); + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + Dprintk("3.\n"); +#endif +} + +static inline void smpboot_restore_warm_reset_vector(void) +{ + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; +} + +static inline void smpboot_setup_io_apic(void) +{ +#if 1 + printk("smpboot_setup_io_apic\n"); +#else + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif +} diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h index 4cc4f318ba..8bd09e21f0 100644 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h @@ -1,7 +1,6 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H -#include #include /* @@ -10,12 +9,14 @@ * pointer indirection), this allows gcc to optimize better */ +extern int get_smp_processor_id(void); + #define rdmsr(_msr,_val1,_val2) do { \ dom0_op_t op; \ op.cmd = DOM0_MSR; \ op.u.msr.write = 0; \ op.u.msr.msr = (_msr); \ - op.u.msr.cpu_mask = (1 << smp_processor_id()); \ + op.u.msr.cpu_mask = (1 << get_smp_processor_id()); \ HYPERVISOR_dom0_op(&op); \ (_val1) = op.u.msr.out1; \ (_val2) = op.u.msr.out2; \ @@ -25,7 +26,7 @@ dom0_op_t op; \ op.cmd = DOM0_MSR; \ op.u.msr.write = 1; \ - op.u.msr.cpu_mask = (1 << smp_processor_id()); \ + op.u.msr.cpu_mask = (1 << get_smp_processor_id()); \ op.u.msr.msr = (_msr); \ op.u.msr.in1 = (_val1); \ op.u.msr.in2 = (_val2); \ -- 2.30.2